#!/usr/bin/env python3
"""
generate_report
===============

Create a Markdown report and per-gauge bar plots from correlation_full.csv.
"""

import os
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# --------------------------------------------------------------------- util
def load_config(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)


# --------------------------------------------------------------------- core
def generate_report(*, csv_path: str, report_md: str, plot_dir: str) -> None:
    df = pd.read_csv(csv_path).dropna(subset=["r"])
    os.makedirs(plot_dir, exist_ok=True)

    has_ci = {"r_ci_lower", "r_ci_upper"}.issubset(df.columns)
    has_p  = "p_value" in df.columns
    plots: dict[str, str] = {}

    for grp in sorted(df["gauge_group"].unique()):
        sub = df[df["gauge_group"] == grp].sort_values("loop_size")

        if has_ci:
            # ensure non-negative error lengths (Matplotlib requirement)
            low_err = np.clip(sub["r"] - sub["r_ci_lower"], 0, None)
            high_err = np.clip(sub["r_ci_upper"] - sub["r"], 0, None)
            yerr = np.vstack([low_err.values, high_err.values])
            ax = sub.plot.bar(
                x="loop_size",
                y="r",
                yerr=yerr,
                legend=False,
                title=f"{grp}: r vs. loop size",
                rot=0,
            )
        else:
            ax = sub.plot.bar(
                x="loop_size",
                y="r",
                legend=False,
                title=f"{grp}: r vs. loop size",
                rot=0,
            )

        fig = ax.get_figure()
        png = f"{grp}_correlation.png"
        fig.savefig(os.path.join(plot_dir, png), dpi=150, bbox_inches="tight")
        plt.close(fig)
        plots[grp] = png

    # ---------------- Markdown report ----------------
    lines: list[str] = ["# Correlation Analysis Report", ""]
    lines += [
        "This report summarises the Pearson correlation between per-link flip "
        "counts and Wilson-loop variances for each gauge group and loop size.",
        "",
        "## Summary Table",
        "",
    ]

    header = "| Gauge | Loop size | r |"
    if has_ci:
        header += " 95 % CI |"
    if has_p:
        header += " p-value |"
    sep = "|" + ":----:|:---------:|:---:|"
    if has_ci:
        sep += ":------------------:|"
    if has_p:
        sep += ":-------:|"
    lines += [header, sep]

    for _, row in df.iterrows():
        cells = [
            row["gauge_group"],
            str(int(row["loop_size"])),
            f"{row['r']:.3f}",
        ]
        if has_ci:
            cells.append(f"[{row['r_ci_lower']:.3f}, {row['r_ci_upper']:.3f}]")
        if has_p:
            cells.append(f"{row['p_value']:.3e}")
        lines.append("| " + " | ".join(cells) + " |")

    lines += ["", "## Correlation Plots", ""]
    for grp, png in plots.items():
        lines += [f"### {grp}", f"![{grp} correlation]({png})", ""]

    os.makedirs(os.path.dirname(report_md), exist_ok=True)
    with open(report_md, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))


# --------------------------------------------------------------------- CLI
def main() -> None:
    repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    cfg = load_config(os.path.join(repo_root, "config.yaml"))
    csv_path = os.path.join(repo_root, cfg["results"]["output_csv"])
    generate_report(
        csv_path=csv_path,
        report_md=os.path.join(os.path.dirname(csv_path), "report.md"),
        plot_dir=os.path.dirname(csv_path),
    )
    print("Report written.")


if __name__ == "__main__":
    main()
